package com.tl.spark.java;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;

import java.util.Arrays;

/**
 * @program: spark-test
 * @description:
 * @author: dong.tl
 * @create: 2018-09-12 14:59
 **/
public class JavaWordCount {
    public static void main(String[] args) {
        SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount").setMaster("local");

        JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);

        JavaRDD<String> lines = sparkContext.textFile("f:/1.txt");

        JavaRDD<String> words = lines.flatMap(s -> {
            return Arrays.asList(s.split(" ")).iterator();
        });

        JavaPairRDD<String, Integer> wordAndOne = words.mapToPair(s -> {
            return new Tuple2<>(s, 1);
        });

        JavaPairRDD<String, Integer> reduced = wordAndOne.reduceByKey((integer, integer2) -> integer + integer2);

        JavaPairRDD<Integer, String> swaped = reduced.mapToPair(tuple2 -> tuple2.swap());

        JavaPairRDD<String, Integer> stringIntegerJavaPairRDD = swaped.sortByKey(false).mapToPair(integerStringTuple2 -> integerStringTuple2.swap());

        stringIntegerJavaPairRDD.foreach(stringIntegerTuple2 -> System.out.println(stringIntegerTuple2));

        sparkContext.stop();




    }
}
